## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## here() starts at /Users/Jo/OneDrive/1_Hertie Studies/Thesis/Hertie-Thesis-Mehler
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## corrplot 0.92 loaded
## Rows: 1019 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): ResponseId, academic_status, educ_cat, gender, age_cat, polinteres...
## dbl (12): age, age10, polinterest, empathy_pc, exp_hate_speech, exp_hostile_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# select indicators
data_indicators <- data %>%
select(cluster, text_length, readability_score, leftright_pred_score) %>% # add cluster/type indicator later
drop_na() %>%
filter(text_length < 2500, readability_score < 40) # remove 2 extreme outliers
# select controls (categorical)
controls_cat <- data %>% select(academic_status, gender, age_cat, minority_cat, polinterest_cat_3, empathy_pc_cat, exp_hate_speech_cat, exp_hostile_engagement_cat) %>% drop_na()
# select controls (numerical)
controls_num <- data %>% select(academic_status, age10, minority, polinterest, empathy_pc, exp_hate_speech, exp_hostile_engagement) %>% drop_na()
# # if needed, look at the original huge sample controls instead of only my combined dataset
# controls <- read_csv(here("data/controls.csv"))
# # chose all controls only as numerics in order to check correlations?≤
# data_controls <- controls %>% select(-ResponseId, -gender, -age, -age_cat, -polinterest_cat_3, -empathy_pc_cat, -exp_hate_speech_cat, # -exp_hostile_engagement_cat) %>% drop_na()# select indicators in a different order (continous variables first)
indicators <- c("text_length", "readability_score", "leftright_pred_score", "cluster")
ggpairs(data_indicators, columns = indicators)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# chose only numeric indicators
data_indicators_num <- data_indicators %>%
select(text_length, readability_score, leftright_pred_score)
cor_matrix <- cor(data_indicators_num, method = "pearson")
cor_matrix## text_length readability_score leftright_pred_score
## text_length 1.0000000 0.42740196 0.13190421
## readability_score 0.4274020 1.00000000 0.08203617
## leftright_pred_score 0.1319042 0.08203617 1.00000000
controls_num_selection <- controls_num %>% select(-academic_status)
cor_matrix <- cor(controls_num_selection, method = "spearman")
cor_matrix## age10 minority polinterest empathy_pc
## age10 1.00000000 -0.107416278 0.19565209 0.110195843
## minority -0.10741628 1.000000000 -0.03657133 0.009031454
## polinterest 0.19565209 -0.036571335 1.00000000 -0.080177227
## empathy_pc 0.11019584 0.009031454 -0.08017723 1.000000000
## exp_hate_speech -0.14542495 0.119937419 0.09873072 -0.123841485
## exp_hostile_engagement 0.09021549 0.117348119 0.28848369 0.063991034
## exp_hate_speech exp_hostile_engagement
## age10 -0.14542495 0.09021549
## minority 0.11993742 0.11734812
## polinterest 0.09873072 0.28848369
## empathy_pc -0.12384148 0.06399103
## exp_hate_speech 1.00000000 0.26899096
## exp_hostile_engagement 0.26899096 1.00000000
cor_matrix %>% corrplot(method = "color", type = "lower", tl.cex = 1, tl.col = "black", addCoef.col = "black")# Relationship between EDU and Experience with Online hostile Engagement
crosstab(data$exp_hostile_engagement_cat, data$educ_cat, prop.c = TRUE)## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## =======================================================================
## data$educ_cat
## data$exp_hostile_engagement_cat High Intermediate Low Total
## -----------------------------------------------------------------------
## Less experience 241 77 69 387
## 41.9% 32.9% 34.3%
## -----------------------------------------------------------------------
## More experience 334 157 132 623
## 58.1% 67.1% 65.7%
## -----------------------------------------------------------------------
## Total 575 234 201 1010
## 56.9% 23.2% 19.9%
## =======================================================================
## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## ================================================================
## data$educ_cat
## data$exp_hate_speech_cat High Intermediate Low Total
## ----------------------------------------------------------------
## Less experience 307 125 113 545
## 53.3% 53.4% 55.9%
## ----------------------------------------------------------------
## More experience 269 109 89 467
## 46.7% 46.6% 44.1%
## ----------------------------------------------------------------
## Total 576 234 202 1012
## 56.9% 23.1% 20.0%
## ================================================================
## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## ==============================================================
## data$educ_cat
## data$polinterest_cat_3 High Intermediate Low Total
## --------------------------------------------------------------
## High 256 93 88 437
## 44.4% 39.7% 43.6%
## --------------------------------------------------------------
## Intermediate 227 110 76 413
## 39.4% 47.0% 37.6%
## --------------------------------------------------------------
## Low 93 31 38 162
## 16.1% 13.2% 18.8%
## --------------------------------------------------------------
## Total 576 234 202 1012
## 56.9% 23.1% 20.0%
## ==============================================================
## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## ========================================================
## data$educ_cat
## data$polinterest High Intermediate Low Total
## --------------------------------------------------------
## 1 32 8 9 49
## 5.6% 3.4% 4.5%
## --------------------------------------------------------
## 2 61 23 29 113
## 10.6% 9.8% 14.4%
## --------------------------------------------------------
## 3 227 110 76 413
## 39.4% 47.0% 37.6%
## --------------------------------------------------------
## 4 256 93 88 437
## 44.4% 39.7% 43.6%
## --------------------------------------------------------
## Total 576 234 202 1012
## 56.9% 23.1% 20.0%
## ========================================================
## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## ===========================================================
## data$educ_cat
## data$empathy_pc_cat High Intermediate Low Total
## -----------------------------------------------------------
## Less empathetic 322 121 91 534
## 56.3% 51.7% 45.3%
## -----------------------------------------------------------
## More empathetic 250 113 110 473
## 43.7% 48.3% 54.7%
## -----------------------------------------------------------
## Total 572 234 201 1007
## 56.8% 23.2% 20.0%
## ===========================================================
## Cell Contents
## |-------------------------|
## | Count |
## | Column Percent |
## |-------------------------|
##
## ======================================================
## data$educ_cat
## data$leftright High Intermediate Low Total
## ------------------------------------------------------
## 1 25 10 12 47
## 4.3% 4.3% 5.9%
## ------------------------------------------------------
## 2 19 9 9 37
## 3.3% 3.8% 4.5%
## ------------------------------------------------------
## 3 61 11 17 89
## 10.6% 4.7% 8.4%
## ------------------------------------------------------
## 4 44 24 12 80
## 7.6% 10.3% 5.9%
## ------------------------------------------------------
## 5 62 16 22 100
## 10.8% 6.8% 10.9%
## ------------------------------------------------------
## 6 131 45 47 223
## 22.7% 19.2% 23.3%
## ------------------------------------------------------
## 7 66 33 29 128
## 11.5% 14.1% 14.4%
## ------------------------------------------------------
## 8 68 30 23 121
## 11.8% 12.8% 11.4%
## ------------------------------------------------------
## 9 47 27 11 85
## 8.2% 11.5% 5.4%
## ------------------------------------------------------
## 10 15 15 9 39
## 2.6% 6.4% 4.5%
## ------------------------------------------------------
## 11 38 14 11 63
## 6.6% 6.0% 5.4%
## ------------------------------------------------------
## Total 576 234 202 1012
## 56.9% 23.1% 20.0%
## ======================================================
# Function to plot relationships between all pairs of variables in a dataframe
plot_relationships <- function(df) {
# Initialize list to store plots
plots <- list()
plot_count <- 1
col_names <- names(df)
# Loop through all unique pairs of variables
for (i in seq_len(ncol(df) - 1)) { # Exclude the last column 'gender' from the x-axis variables
for (j in (i+1):ncol(df)) {
x <- col_names[i]
y <- col_names[j]
# Create scatter plot for each pair
plot <- ggplot(df, aes_string(x = x, y = y)) +
geom_point(position = position_jitter(width = 0.2, height = 0.2), alpha = 0.6, color = "skyblue") +
geom_smooth(method = "lm", colour = "black", linewidth = 0.5) +
theme_minimal() +
labs(title = paste("Scatter plot between", x, "and", y))
# Store the plot in the list
plots[[plot_count]] <- plot
plot_count <- plot_count + 1
}
}
# Print plots
lapply(plots, print)
}# use only categorical variables
controls_cat <- controls_cat %>% select(-academic_status)
plot_relationships(controls_cat)## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## [[1]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[2]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[3]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[4]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[5]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[6]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[7]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[8]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[9]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[10]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[11]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[12]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[13]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[14]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[15]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[16]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[17]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[18]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[19]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[20]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[21]]
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## [[1]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[2]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[3]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[4]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[5]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[6]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[7]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[8]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[9]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[10]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[11]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[12]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[13]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[14]]
## `geom_smooth()` using formula = 'y ~ x'
##
## [[15]]
## `geom_smooth()` using formula = 'y ~ x'